需安装Tor浏览器,动态切换IP
import socket,socks,requests
from stem import Signal
from stem.control import Controller
import time
from lxml import etree
controller = Controller.from_port(port = 9151)
controller.authenticate()
socks.set_default_proxy(socks.SOCKS5, "127.0.0.1", 9150)
socket.socket = socks.socksocket
urls = ['https://movie.douban.com/top250?start={}&filter='.format(str(i)) for i in range(0, 250, 25)]
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'}
for url in urls:
a = requests.get("http://checkip.amazonaws.com").text
print("第n次更新的IP:", a)
time1 = time.time()
html = requests.get(url, headers=headers)
selector = etree.HTML(html.text)
movie_hrefs = selector.xpath('//div[@class="hd"]/a/@href')
print(movie_hrefs)
time2=time.time()
print('花费的时间:',time2-time1)
controller.signal(Signal.NEWNYM) # 跟新ip