Scrapy框架采集微信公众号数据,机智操作绕过反爬验证码( 二 )

get_cookies.py
# coding=utf-8from IP.free_ip import get_random_proxyfrom fake_useragent import UserAgentimport requestsua = UserAgent().randomdef get_new_headers():headers = {"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9","Accept-Encoding": "gzip, deflate, br","Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8","User-Agent": ua}return headersdef get_new_cookies():url = ';query= --tt-darkmode-color: #999999;">free_ip.py
# coding=utf-8import requestsproxypool_url = ''def get_random_proxy():response = requests.get(proxypool_url)try:if response.status_code == 200:return response.text.strip()except ConnectionError:return Noneif __name__ == '__main__':print(get_random_proxy())setting.py
BOT_NAME = 'sougou_search_spider'SPIDER_MODULES = ['sougou_search_spider.spiders']NEWSPIDER_MODULE = 'sougou_search_spider.spiders'REDIRECT_ENABLED = FalseHTTPERROR_ALLOWED_CODES = [302]ROBOTSTXT_OBEY = FalseCOOKIES_ENABLED = TrueITEM_PIPELINES = {'sougou_search_spider.pipelines.CsvSougouSearchSpiderPipeline': 300,}