1. gevent + socket 扫描端口:
#encoding=utf-8
#author: walker
#date: 2014-07-16
#function: 使用协程(异步)方式扫描1000个端口
import gevent.monkey
gevent.monkey.patch_socket()
import gevent, socket, sys, time
def task(addr):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(100)
try:
sock.connect(addr)
print('Port ' + str(str(addr[1])) + ' is open')
except:
pass
finally:
sock.close()
#扫描1000个端口
def asynchronous():
threads = []
for i in range(0, 1000):
threads.append(gevent.spawn(task, ('127.0.0.1', i)))
gevent.joinall(threads)
t0 = time.time()
asynchronous()
t1 = time.time()
print('time: {}s'.format(t1 - t0))
- Trip+Requests实现爬虫代理验证:
import re
import trip
def get_proxies(number=10):
r = yield trip.get('http://www.89ip.cn/apijk/' +
'?&tqsl=%s&sxa=&sxb=&tta=&ports=&ktip=&cf=1' % number)
p = re.findall('((?:\d{1,3}.){3}\d{1,3}:\d+)', r.text)
print(p)
trip.run(get_proxies)
import re, time
import requests, trip
@trip.coroutine
def get_proxies(number=10):
r = yield trip.get('http://www.89ip.cn/apijk/' +
'?&tqsl=%s&sxa=&sxb=&tta=&ports=&ktip=&cf=1' % number)
p = re.findall('((?:\d{1,3}.){3}\d{1,3}:\d+)', r.text)
raise trip.Return(p)
@trip.coroutine
def test_proxy(proxy):
try:
r = yield trip.get('http://httpbin.org/get', timeout=5,
proxies={ 'http': proxy, 'https': proxy })
if 'httpbin.org' not in r.text:
raise Exception('Invalid reply')
except Exception as e:
pass
else:
raise trip.Return(proxy)
def main():
proxies = yield get_proxies(100)
r = yield [test_proxy(p) for p in proxies]
print(filter(lambda x: x, r))
start_time = time.time()
trip.run(main)
print(time.time() - start_time)