获取网站相关信息
import dns.resolver
import urllib2
resp = urllib2.urlopen('http://www.www.com')
print type(resp)
print resp.headers
print resp.headers['Server']
print resp.getcode()
print resp.geturl()
获取title
import mongo
import header
import pymongo
import random
import requests
import urllib2
from bs4 import BeautifulSoup
#----------------------------------------------------------------------
def url_info(url):
""""""
data = urllib2.Request(url,headers=header.get_header())
html_url = requests.get(url, timeout=random.randint(5,10))
#print type(data.headers)
soup = BeautifulSoup(html_url.content, 'html.parser')
try:
print soup.title.string
encoding_url = html_url.encoding
title_url = soup.title.string
mongo.ls_Info.update({"URL":url},
{"$set":{'title':title_url,'encoding':encoding_url}},
upsert = True)
except Exception,e:
print str(e)
pass